open! Core
open! Async

(* PAGER=cat because perf spawns [less] if you get the arguments wrong, and that keeps the
   parent process alive even though it just failed. That, in turn, makes magic-trace stop
   responding to Ctrl+C. *)
let perf_env = `Extend [ "PAGER", "cat" ]
let perf = Env_vars.perf_path

module Record_opts = struct
  type t =
    { multi_thread : bool
    ; full_execution : bool
    ; snapshot_size : Pow2_pages.t option
    ; callgraph_mode : Callgraph_mode.t option
    }

  let param =
    let%map_open.Command multi_thread =
      flag
        "-multi-thread"
        no_arg
        ~doc:
          "Records every thread of an executable, instead of only the thread whose TID \
           is equal to the process' PID.\n\
           Warning: this flag decreases the trace's lookback period because the kernel \
           divides snapshot buffer resources equally across all threads."
    and full_execution =
      flag
        "-full-execution"
        no_arg
        ~doc:
          "Record a program's full execution instead of using a snapshot ring buffer.\n\
           Warning: The trace grows at a rate of hundreds of megabytes per second. The \
           trace viewer may fail to load traces larger than 100M."
    and snapshot_size =
      Pow2_pages.optional_flag
        "-snapshot-size"
        ~doc:
          " Tunes the amount of data captured in a trace. Default: 4M if root or \
           perf_event_paranoid < 0, 256K otherwise. When running with sampling,  \
           defaults to 512K, but cannot be changed. For more info: \
           https://magic-trace.org/w/s"
    and callgraph_mode = Callgraph_mode.param in
    { multi_thread; full_execution; snapshot_size; callgraph_mode }
  ;;
end

let write_perf_dlfilter filename =
  let error = Deferred.Or_error.error_string "Unable to write [perf_dlfilter.so]." in
  try
    match Perf_dlfilter.read "perf_dlfilter.so" with
    | Some data ->
      Out_channel.write_all filename ~data;
      (* Otherwise this is written without executable permission. *)
      Core_unix.chmod filename ~perm:0o775 |> Deferred.Or_error.return
    | None -> error
  with
  | Sys_error _ -> error
;;

let perf_exit_to_or_error = function
  | Ok () | Error (`Signal _) -> Ok ()
  | Error (`Exit_non_zero n) -> Core_unix.Exit.of_code n |> Core_unix.Exit.or_error
;;

(* Same as [Caml.exit] but does not run at_exit handlers *)
external sys_exit : int -> 'a = "caml_sys_exit"

let perf_fork_exec ?env ~prog ~argv () =
  let pr_set_pdeathsig = Or_error.ok_exn Linux_ext.pr_set_pdeathsig in
  match Core_unix.fork () with
  | `In_the_child ->
    pr_set_pdeathsig Signal.kill;
    never_returns
      (try Core_unix.exec ?env ~prog ~argv () with
       | _ -> sys_exit 127)
  | `In_the_parent pid -> pid
;;

let max_sampling_frequency () =
  In_channel.read_all "/proc/sys/kernel/perf_event_max_sample_rate"
  |> String.rstrip (* Strip off newline *)
  |> Int.of_string
;;

module Recording = struct
  module Data = struct
    type t = { callgraph_mode : Callgraph_mode.t option } [@@deriving sexp]
  end

  module Snapshot_when = struct
    type t =
      | Never
      | At_exit
      | Function_call
  end

  module Control = struct
    (* Perf has several mechanisms to cause a snapshot:
       - The `snapshot` control command
       - snapshot-on-exit; exit can occur due to tracee exit, a signal, or the `stop`
         control command.
       - SIGUSR2

       The mechanism we use depends on the user-selected event.

       # Snapshot on function call (breakpoint)
       snapshot-on-exit doesn't apply because it can't support multi-snapshot. Prefer
       ctlfd, fallback to SIGUSR2.

       If a tracee exits soon after hitting the breakpoint, perf may exit before we tell
       it to snapshot. We could fix this in single-snapshot mode by using snapshot-on-exit
       in combination with SIGINT or the stop command. But this is rare enough that we
       don't implement this.

       # Snapshot on Exit
       Practically, this means take a snapshot when the tracee exits or when the user
       sends SIGINT to magic-trace.

       In run mode, it is guaranteed via waitpid that the tracee exits before backend
       snapshotting logic runs. In attach mode, magic-trace doesn't get any notification when
       the tracee exits. So, when we enter [maybe_take_snapshot] any of these could have
       happened:
       1. tracee died, perf has exited (and took a snapshot if snapshot-on-exit)
       2. tracee died, perf is snapshotting due to snapshot-on-exit
       3. tracee alive, perf is nominal (attach mode, magic-trace got SIGINT)

       perf snapshot-on-exit means that we cause perf to exit in the process of
       snapshotting (otherwise we would get an extra unwanted snapshot), so our perf
       shutdown logic becomes redundant. Similarly, if the tracee exits before we reach
       snapshot logic, the snapshot logic is redundant.

       # Implementation

       perf support | MT event | --snapshot=e | snapshot | shutdown | note
       ================================================================================================
       old          | *        | unsupported  | SIGUSR2  | SIGTERM  | tracee exit won't induce snapshot
       snap-on-exit | on-exit  | yes          | SIGINT   | SIGTERM  | one or both signal is redundant
       snap-on-exit | function | no           | SIGUSR2  | SIGTERM  |
       ctlfd        | on-exit  | yes          | stop     | stop     | one or both controls is redundant
       ctlfd        | function | no           | snapshot | stop     |
    *)

    type t =
      | Signals of
          { snapshot : Signal.t
          ; shutdown : Signal.t
          }
      | Ctlfd of
          { ctlfd : Perf_ctlfd.t
          ; snapshot : Perf_ctlfd.Command.t
          ; shutdown : Perf_ctlfd.Command.t
          }

    let control_opt = function
      | Ctlfd { ctlfd; _ } -> Perf_ctlfd.control_opt ctlfd
      | Signals _ -> [], Fn.id
    ;;

    let create ~capabilities ~(snapshot_when : Snapshot_when.t) =
      let select ~at_exit ~function_call =
        match snapshot_when with
        | Never | Function_call -> function_call
        | At_exit -> at_exit
      in
      let perf_snapshot_on_exit =
        Perf_capabilities.(do_intersect capabilities snapshot_on_exit)
      in
      let snapshot_opt =
        match snapshot_when with
        | Never -> []
        | At_exit when perf_snapshot_on_exit -> [ "--snapshot=e" ]
        | At_exit | Function_call -> [ "--snapshot" ]
      in
      let control =
        if Perf_capabilities.(do_intersect capabilities ctlfd)
        then (
          assert perf_snapshot_on_exit;
          let shutdown = Perf_ctlfd.Command.stop in
          let snapshot =
            select
              ~at_exit:Perf_ctlfd.Command.stop
              ~function_call:Perf_ctlfd.Command.snapshot
          in
          Ctlfd { ctlfd = Perf_ctlfd.create (); shutdown; snapshot })
        else if perf_snapshot_on_exit
        then (
          let shutdown = Signal.term in
          let snapshot = select ~at_exit:Signal.int ~function_call:Signal.usr2 in
          Signals { shutdown; snapshot })
        else (
          let shutdown = Signal.term in
          let snapshot = Signal.usr2 in
          Signals { shutdown; snapshot })
      in
      let control_opt, invoke_after_fork = control_opt control in
      control, snapshot_opt @ control_opt, invoke_after_fork
    ;;

    let ignore_perf_exit = function
      | Ok () | Error `Perf_exited -> ()
    ;;

    let take_snapshot t pid =
      match t with
      | Signals { snapshot; _ } -> Signal_unix.send_i snapshot (`Pid pid)
      | Ctlfd { ctlfd; snapshot; _ } ->
        Perf_ctlfd.dispatch_and_block_for_ack ctlfd snapshot |> ignore_perf_exit
    ;;

    let shutdown t pid =
      match t with
      | Signals { shutdown; _ } -> Signal_unix.send_i shutdown (`Pid pid)
      | Ctlfd { ctlfd; shutdown; _ } ->
        Perf_ctlfd.dispatch_and_block_for_ack ctlfd shutdown |> ignore_perf_exit
    ;;
  end

  type t =
    { pid : Pid.t
    ; snapshot_when : Snapshot_when.t
    ; control : Control.t
    }

  let perf_selector_of_trace_scope : Trace_scope.t -> string = function
    | Userspace -> "u"
    | Kernel -> "k"
    | Userspace_and_kernel -> "uk"
  ;;

  let perf_intel_pt_config_of_timer_resolution
    ~capabilities
    (timer_resolution : Timer_resolution.t)
    =
    let timer_resolution =
      match
        ( timer_resolution
        , Perf_capabilities.(do_intersect capabilities configurable_psb_period) )
      with
      | (Normal | High), false ->
        Core.eprintf
          "Warning: This machine has an older generation processor, timing granularity \
           will be ~1us instead of ~10ns. Consider using a newer machine.\n\
           %!";
        Timer_resolution.Low
      | _, _ -> timer_resolution
    in
    match timer_resolution with
    | Low -> Or_error.return ""
    | Normal -> Or_error.return "cyc=1,cyc_thresh=1,mtc_period=0"
    | High -> Or_error.return "cyc=1,cyc_thresh=1,mtc_period=0,noretcomp=1"
    | Sample _ ->
      Or_error.error_string
        "[-timer-resolution Sample] can only be used in sampling mode. (Did you forget \
         [-sampling]?)"
    | Custom { cyc; cyc_thresh; mtc; mtc_period; noretcomp; psb_period } ->
      let make_config key = function
        | None -> None
        | Some value -> Some [%string "%{key}=%{value#Int}"]
      in
      [ make_config "cyc" (Option.map ~f:Bool.to_int cyc)
      ; make_config "cyc_thresh" cyc_thresh
      ; make_config "mtc" (Option.map ~f:Bool.to_int mtc)
      ; make_config "mtc_period" mtc_period
      ; make_config "noretcomp" (Option.map ~f:Bool.to_int noretcomp)
      ; make_config "psb_period" psb_period
      ]
      |> List.filter_opt
      |> String.concat ~sep:","
      |> Or_error.return
  ;;

  let perf_cycles_config_of_timer_resolution (timer_resolution : Timer_resolution.t) =
    match timer_resolution with
    | Low -> Or_error.return "freq=1000"
    | Normal -> Or_error.return "freq=10000"
    | High -> Or_error.return [%string "freq=%{max_sampling_frequency ()#Int}"]
    | Sample { freq } -> Or_error.return [%string "freq=%{freq#Int}"]
    | Custom _ ->
      Or_error.error_string
        "[-timer-resolution Custom] can only be used with Intel PT. (Are you running on \
         a physical Intel machine without [-sampling]?)"
  ;;

  let perf_config_of_extra_events ~selector extra_events =
    List.map
      extra_events
      ~f:(fun ({ when_to_sample; name; precision } : Collection_mode.Event.t) ->
        let precision_selector =
          match precision with
          | Arbitrary_skid -> ""
          | Constant_skid -> "p"
          | Request_zero_skid -> "pp"
          | Zero_skid -> "ppp"
          | Maximum_possible -> "P"
        in
        match when_to_sample with
        | Period period ->
          [%string
            "%{name#Collection_mode.Event.Name}/period=%{period#Int}/%{selector}%{precision_selector}"]
        | Frequency freq ->
          [%string
            "%{name#Collection_mode.Event.Name}/freq=%{freq#Int}/%{selector}%{precision_selector}"])
  ;;

  let perf_args_of_collection_mode
    ~capabilities
    ~timer_resolution
    ~trace_scope
    (collection_mode : Collection_mode.t)
    =
    let selector = perf_selector_of_trace_scope trace_scope in
    let%map.Or_error primary_event =
      match collection_mode with
      | Intel_processor_trace _ ->
        let%map.Or_error intel_pt_config =
          perf_intel_pt_config_of_timer_resolution ~capabilities timer_resolution
        in
        [%string "intel_pt/%{intel_pt_config}/%{selector}"]
      | Stacktrace_sampling _ ->
        let%map.Or_error cycles_config =
          perf_cycles_config_of_timer_resolution timer_resolution
        in
        [%string "cycles/%{cycles_config}/%{selector}"]
    in
    let extra_events =
      perf_config_of_extra_events ~selector (Collection_mode.extra_events collection_mode)
    in
    let arg_string = String.concat ~sep:"," (primary_event :: extra_events) in
    [ [%string "--event=%{arg_string}"] ]
  ;;

  let init_record_dir record_dir =
    Core_unix.mkdir_p record_dir;
    Sys.readdir record_dir
    |> Deferred.bind
         ~f:
           (Deferred.Array.iter ~how:`Sequential ~f:(fun file ->
              if String.is_prefix file ~prefix:"perf.data"
              then Sys.remove (record_dir ^/ file)
              else Deferred.return ()))
  ;;

  let attach_and_record
    { Record_opts.multi_thread; full_execution; snapshot_size; callgraph_mode }
    ~debug_print_perf_commands
    ~(subcommand : Subcommand.t)
    ~(when_to_snapshot : When_to_snapshot.t)
    ~(trace_scope : Trace_scope.t)
    ~multi_snapshot
    ~(timer_resolution : Timer_resolution.t)
    ~record_dir
    ~(collection_mode : Collection_mode.t)
    pids
    =
    let%bind () = init_record_dir record_dir in
    let%bind capabilities = Perf_capabilities.detect_exn () in
    let%bind.Deferred.Or_error () =
      match trace_scope, Perf_capabilities.(do_intersect capabilities kernel_tracing) with
      | Userspace, _ | _, true -> return (Ok ())
      | (Kernel | Userspace_and_kernel), false ->
        if not Env_vars.perf_is_privileged
        then
          Deferred.Or_error.error_string
            "magic-trace must be run as root in order to trace the kernel"
        else return (Ok ())
    in
    (match when_to_snapshot, subcommand with
     | Magic_trace_or_the_application_terminates, Run ->
       if not Perf_capabilities.(do_intersect capabilities snapshot_on_exit)
       then
         printf
           "Warning: magic-trace will only be able to snapshot when magic-trace is \
            Ctrl+C'd, not when the application it's running ends. If that application \
            ends before magic-trace can snapshot it, the resulting trace will be empty. \
            The ability to snapshot when an application terminates was added to perf's \
            userspace tools in version 5.4. For more information, see:\n\
            https://github.com/janestreet/magic-trace/wiki/Supported-platforms,-programming-languages,-and-runtimes#supported-perf-versions\n\
            %!"
     | Application_calls_a_function _, _ | _, Attach -> ());
    (* CR-someday alamoreaux: [--per-thread] is an important argument here.
       However perf fails with an invalid argument to mmap if [--per-thread] is
       given as well as additional events to sample. Without [--per-thread], you
       can end up with traces have gaps in time if a process was switching
       between CPUs.

       We would allow this flag always if perf didn't crash. Even then, it might
       be worth having magic-trace potentially be able to handle / filter the
       trace better. *)
    let per_thread_opts =
      match Collection_mode.extra_events collection_mode with
      | [] -> [ "--per-thread" ]
      | _ -> []
    in
    let thread_opts =
      match multi_thread with
      | false -> List.concat [ per_thread_opts; [ "-t" ] ]
      | true -> [ "-p" ]
    in
    let pid_opt = [ List.map pids ~f:Pid.to_string |> String.concat ~sep:"," ] in
    let%bind.Deferred.Or_error selected_callgraph_mode =
      let open Deferred.Or_error.Let_syntax in
      match collection_mode with
      | Intel_processor_trace _ ->
        (match callgraph_mode with
         | None -> return None
         | Some _ ->
           Deferred.Or_error.error_string
             "[-callgraph-mode] is only configurable when running magic-trace with \
              sampling.")
      | Stacktrace_sampling _ ->
        (match
           ( callgraph_mode
           , Perf_capabilities.(do_intersect capabilities last_branch_record) )
         with
         (* We choose to default to dwarf if lbr is not available. This is
            because dwarf will work on any setup, while frame pointers requires
            compilation with [-fno-omit-frame-pointers]. Although decoding is
            slow and perf.data file sizes are larger. *)
         | None, false ->
           Core.eprintf
             "Warning: [-callgraph-mode] is defaulting to [Dwarf] which may have high \
              overhead and decoding time. For more info: https://magic-trace.org/w/b\n";
           return (Some Callgraph_mode.Dwarf)
         | None, true ->
           Core.eprintf
             "Warning: [-callgraph-mode] is defaulting to [Last_branch_record] which may \
              lose data and has limited callstack depth. For more info: \
              https://magic-trace.org/w/b\n";
           return (Some (Callgraph_mode.Last_branch_record { stitched = true }))
         | Some (Last_branch_record _), false ->
           Deferred.Or_error.error_string
             "[-callgraph-mode Last_branch_record] is only supported on an Intel machine \
              which supports LBR. Try passing [Frame_pointers] or [Dwarf] instead."
         | Some mode, false -> return (Some mode)
         | Some mode, true -> return (Some mode))
    in
    let%bind.Deferred.Or_error event_opts =
      perf_args_of_collection_mode
        ~capabilities
        ~timer_resolution
        ~trace_scope
        collection_mode
      |> Deferred.return
    in
    let kcore_opts =
      if Env_vars.perf_no_kcore
      then []
      else (
        match
          ( collection_mode
          , trace_scope
          , Perf_capabilities.(do_intersect capabilities kcore) )
        with
        | Intel_processor_trace _, Userspace, _ | Stacktrace_sampling _, _, _ -> []
        | Intel_processor_trace _, (Kernel | Userspace_and_kernel), true -> [ "--kcore" ]
        | Intel_processor_trace _, (Kernel | Userspace_and_kernel), false ->
          (* Strictly speaking, we could recreate tools/perf/perf-with-kcore.sh
             here instead of bailing. But that's tricky, and upgrading to a newer
             perf is easier. *)
          Core.eprintf
            "Warning: old perf version detected! perf userspace tools v5.5 contain an \
             important feature, kcore, that make decoding kernel traces more reliable. \
             In our experience, tracing the kernel mostly works without this feature, \
             but you may run into problems if you're trying to trace through \
             self-modifying code (the kernel may do this more than you think). Install a \
             perf version >= 5.5 to avoid this.\n\
             %!";
          [])
    in
    let snapshot_size_opt =
      match snapshot_size, collection_mode with
      | Some snapshot_size, Intel_processor_trace _ ->
        [ [%string "-m,%{Pow2_pages.num_pages snapshot_size#Int}"] ]
      | Some _, Stacktrace_sampling _ ->
        Core.eprintf
          "Warning: -snapshot-size is ignored when not running with Intel PT.\n";
        []
      | None, Intel_processor_trace _ | None, Stacktrace_sampling _ -> []
    in
    let snapshot_when : Snapshot_when.t =
      if full_execution
      then Never
      else (
        match when_to_snapshot with
        | Magic_trace_or_the_application_terminates -> Snapshot_when.At_exit
        | Application_calls_a_function _ -> Function_call)
    in
    let control, control_opt, invoke_after_fork =
      match collection_mode with
      | Intel_processor_trace _ -> Control.create ~capabilities ~snapshot_when
      | Stacktrace_sampling _ ->
        (* We don't take perf AUX snapshots in stacktrace sampling mode *)
        Control.create ~capabilities ~snapshot_when:Never
    in
    let overwrite_opts =
      match collection_mode, full_execution with
      | Stacktrace_sampling _, false -> [ "--overwrite" ]
      | Intel_processor_trace _, false | _, true -> []
    in
    let switch_opts =
      match multi_snapshot with
      | true -> [ "--switch-output=signal" ]
      | false -> []
    in
    let argv =
      List.concat
        [ [ perf; "record"; "-o"; record_dir ^/ "perf.data"; "--timestamp" ]
        ; event_opts
        ; overwrite_opts
        ; switch_opts
        ; thread_opts
        ; pid_opt
        ; control_opt
        ; kcore_opts
        ; snapshot_size_opt
        ; Callgraph_mode.to_perf_record_args selected_callgraph_mode
        ]
    in
    if debug_print_perf_commands then Core.printf "%s\n%!" (String.concat ~sep:" " argv);
    (* Perf prints output we don't care about and --quiet doesn't work for some reason *)
    let perf_pid = perf_fork_exec ~env:perf_env ~prog:perf ~argv () in
    (* This detaches the perf process from our "process group" but not our session. This
       makes it so that when Ctrl-C is sent to magic_trace in the terminal to end an attach
       session, it doesn't also send SIGINT to the perf process, allowing us to send it a
       SIGUSR2 first to get it to capture a snapshot before exiting. *)
    Core_unix.setpgid ~of_:perf_pid ~to_:perf_pid;
    invoke_after_fork ();
    let%map () = Async.Clock_ns.after (Time_ns.Span.of_ms 500.0) in
    (* Check that the process hasn't failed after waiting, because there's no point pausing
       to do recording if we've already failed. *)
    let res = Core_unix.wait_nohang (`Pid perf_pid) in
    let%map.Or_error () =
      match res with
      | Some (_, exit) -> perf_exit_to_or_error exit
      | _ -> Ok ()
    in
    ( { pid = perf_pid; snapshot_when; control }
    , { Data.callgraph_mode = selected_callgraph_mode } )
  ;;

  let maybe_take_snapshot t ~source =
    match t.snapshot_when, source with
    (* [Never] only comes up in [-full-execution] mode. In that mode, perf always gives a
       complete trace; there's no snapshotting. *)
    | Never, _ -> ()
    (* Do not snapshot at the end of a program if the user has set up a trigger symbol. *)
    | Function_call, `ctrl_c -> ()
    (* This shouldn't happen unless there was a bug elsewhere. It would imply that a trigger
       symbol was hit when there is no trigger symbol configured. *)
    | At_exit, `function_call -> ()
    (* Trigger symbol was hit, and we're configured to look for them. *)
    | Function_call, `function_call -> Control.take_snapshot t.control t.pid
    (* Ctrl-C was hit, and we're configured to look for that. *)
    | At_exit, `ctrl_c -> Control.take_snapshot t.control t.pid
  ;;

  let finish_recording t =
    Control.shutdown t.control t.pid;
    (* This should usually be a signal exit, but we don't really care, if it didn't
       produce a good perf.data file the next step will fail.

       [Monitor.try_with] because [waitpid] raises if perf exited before we get here. *)
    match%map.Deferred Monitor.try_with (fun () -> Async_unix.Unix.waitpid t.pid) with
    | Ok res -> perf_exit_to_or_error res
    | Error _exn -> Ok ()
  ;;
end

module Decode_opts = struct
  type t = unit

  let param = Command.Param.return ()
end

let decode_events
  ?perf_maps
  ?(filter_same_symbol_jumps = true)
  ~debug_print_perf_commands
  ~(recording_data : Recording.Data.t option)
  ~record_dir
  ~(collection_mode : Collection_mode.t)
  ()
  =
  let%bind capabilities = Perf_capabilities.detect_exn () in
  let%bind.Deferred.Or_error dlfilter_opts =
    match
      ( Perf_capabilities.(do_intersect capabilities dlfilter)
      , collection_mode
      , Env_vars.no_dlfilter || not filter_same_symbol_jumps )
    with
    | true, Intel_processor_trace _, false ->
      let filename = record_dir ^/ "perf_dlfilter.so" in
      let%map.Deferred.Or_error () = write_perf_dlfilter filename in
      [ "--dlfilter"; filename ]
    | false, _, _ | true, Stacktrace_sampling _, _ | true, Intel_processor_trace _, true
      -> Deferred.Or_error.return []
  in
  let%bind files =
    Sys.readdir record_dir
    >>| Array.to_list
    >>| List.filter ~f:(String.is_prefix ~prefix:"perf.data")
  in
  let%map result =
    Deferred.List.map files ~how:`Sequential ~f:(fun perf_data_file ->
      let itrace_opts =
        match collection_mode with
        | Intel_processor_trace _ -> [ "--itrace=bep" ]
        | Stacktrace_sampling _ -> []
      in
      let fields_opts =
        match collection_mode with
        | Intel_processor_trace _ ->
          [ "-F"; "pid,tid,time,flags,ip,addr,sym,symoff,synth,dso,event,period" ]
        | Stacktrace_sampling _ -> [ "-F"; "pid,tid,time,ip,sym,symoff,dso,event,period" ]
      in
      let args =
        List.concat
          [ [ "script"; "-i"; record_dir ^/ perf_data_file; "--ns" ]
          ; itrace_opts
          ; fields_opts
          ; dlfilter_opts
          ; Option.map recording_data ~f:(fun recording_data ->
              Callgraph_mode.to_perf_script_args recording_data.callgraph_mode)
            |> Option.value ~default:[]
          ]
      in
      if debug_print_perf_commands
      then Core.printf "%s %s\n%!" perf (String.concat ~sep:" " args);
      (* CR-someday tbrindus: this should be switched over to using
         [perf_fork_exec] to avoid the [perf script] process from outliving
         the parent. *)
      let%map perf_script_proc = Process.create_exn ~env:perf_env ~prog:perf ~args () in
      let line_pipe = Process.stdout perf_script_proc |> Reader.lines in
      don't_wait_for
        (Reader.transfer
           (Process.stderr perf_script_proc)
           (Writer.pipe (force Writer.stderr)));
      let events = Perf_decode.to_events ?perf_maps line_pipe in
      let close_result =
        let%map exit_or_signal = Process.wait perf_script_proc in
        perf_exit_to_or_error exit_or_signal
      in
      events, close_result)
  in
  let events = List.map result ~f:(fun (events, _close_result) -> events) in
  (* Force [close_result] to wait on [Pipe.t]s in order. *)
  let close_result =
    List.map result ~f:(fun (_events, close_result) -> close_result)
    |> Deferred.List.fold ~init:(Ok ()) ~f:(fun acc close_result ->
      let%bind.Deferred.Or_error () = close_result in
      Deferred.return acc)
  in
  Ok { Decode_result.events; close_result }
;;
